Seasonality & Smoothing

Seasonal Decomposition

rm(list=ls()) # clear workspace
cat("\014")  # clear console
ticker="FRED/TOTALNSA"
startdate<-"1980-01-01"
library(Quandl)
quandl_api_key<-read.csv("../data/quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(quandl_api_key)
VehicleNSA <- Quandl(ticker,start_date=startdate,type="ts")
plot(VehicleNSA)

decomp<-decompose(VehicleNSA)
plot(decomp)

Seasonal Adjustment

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
fred_api_key<-read.csv("./fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
ticker="FRED/TOTALNSA"
startdate<-"1980-01-01"
library(Quandl)
VehicleNSA=Quandl(ticker,start_date=startdate,type="ts")
plot(VehicleNSA)

decomp<-decompose(VehicleNSA)
VehicleSA<-decomp$x-decomp$seasonal
Vehicle<-cbind(as.xts(VehicleNSA),as.xts(VehicleSA))
colnames(Vehicle)<-c("NSA","SA")
plot(Vehicle,legend.loc = "topleft")

Simple Moving Average

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
fred_api_key<-read.csv("./fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
ticker="FRED/TOTALNSA"
startdate<-"1980-01-01"
library(Quandl)
VehicleNSA=Quandl(ticker,start_date=startdate,type="ts")
library(TTR)
Smoothed<-SMA(VehicleNSA,n=12)
toplot<-cbind(as.xts(VehicleNSA),as.xts(Smoothed))
colnames(toplot)<-c("NSA","Smoothed")
plot(toplot,legend.loc="topleft")

Regression

Univariate Regression

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044676 -0.004302 -0.000076  0.004093  0.040100 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0003523  0.0001623    2.17   0.0301 *  
## CAC         0.6858248  0.0147070   46.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006993 on 1857 degrees of freedom
## Multiple R-squared:  0.5394, Adjusted R-squared:  0.5391 
## F-statistic:  2175 on 1 and 1857 DF,  p-value: < 2.2e-16

Multivariate Regression

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.046142 -0.003848 -0.000009  0.003987  0.033370 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002694  0.0001542   1.747   0.0807 .  
## CAC         0.5152838  0.0183380  28.099   <2e-16 ***
## FTSE        0.3644971  0.0254199  14.339   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared:  0.5853, Adjusted R-squared:  0.5849 
## F-statistic:  1310 on 2 and 1856 DF,  p-value: < 2.2e-16

Regress w/Interactions

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC:FTSE,Returns) #Run Regression
reg2<-lm(DAX~CAC*FTSE,Returns) # Run Regression

Regress w/Calendar Dummy

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library("tidyquant") 
Tickers = c('MSFT','GE') # asset ticker
start_date = '2017-01-01' # data start date
end_date = '2021-03-01' # data end date
data_src = 'yahoo' # data source
getSymbols(Tickers,from = start_date,to = end_date, src=data_src)
## [1] "MSFT" "GE"
Returns = do.call(merge,lapply(Tickers, function(x) 
        periodReturn(Ad(get(x)),period='daily',type='log')))
Returns = na.omit(Returns[-1,])
colnames(Returns)<-Tickers
library(lubridate)
Returns$Q1<-ifelse(quarter(index(Returns))==1,1,0)
reg1=lm(MSFT~GE*Q1,data=Returns)
print(summary(reg1))
## 
## Call:
## lm(formula = MSFT ~ GE * Q1, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.088806 -0.007493  0.000016  0.008366  0.095418 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.460e-03  5.973e-04   2.445   0.0147 *  
## GE           9.620e-02  2.342e-02   4.108  4.3e-05 ***
## Q1          -2.437e-05  1.147e-03  -0.021   0.9831    
## GE:Q1        3.423e-01  3.848e-02   8.894  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01647 on 1040 degrees of freedom
## Multiple R-squared:  0.1766, Adjusted R-squared:  0.1742 
## F-statistic: 74.36 on 3 and 1040 DF,  p-value: < 2.2e-16

Regression Plots

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
plot(DAX~CAC,data=Returns)
abline(reg1)

Regression Plots(v2)

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
#par(mfrow=c(3,2))
plot(reg1,which=c(1))

plot(reg1,which=c(2))

plot(reg1,which=c(3))

plot(reg1,which=c(4))

plot(reg1,which=c(5))

plot(reg1,which=c(6))

Extract Regress. Coef.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print('Coefficients')
## [1] "Coefficients"
coef(reg1)
##  (Intercept)          CAC 
## 0.0003522993 0.6858247625
alpha=coef(reg1)[1]
beta=coef(reg1)[2]
print('Confidence intervals')
## [1] "Confidence intervals"
confint(reg1)
##                    2.5 %      97.5 %
## (Intercept) 3.396064e-05 0.000670638
## CAC         6.569808e-01 0.714668761
print('Covariance')
## [1] "Covariance"
vcov(reg1)
##               (Intercept)           CAC
## (Intercept)  2.634610e-08 -9.453302e-08
## CAC         -9.453302e-08  2.162960e-04

Extract Regression Residuals

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
Errors<-resid(reg1)
plot(Errors)

#or
Errors2<-Returns$DAX-reg1$fitted.values
head(cbind(Errors,Errors2))
##          Errors       Errors2
## 2 -0.0009971609 -0.0009971609
## 3  0.0080783190  0.0080783190
## 4  0.0126150011  0.0126150011
## 5 -0.0081269243 -0.0081269243
## 6 -0.0015174787 -0.0015174787
## 7  0.0040407495  0.0040407495

Regression Fitted Values

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
Estimates<-fitted(reg1)
#or
Estimates2<-coef(reg1)[1]+coef(reg1)[2]*Returns$CAC
head(cbind(Estimates,Estimates2))
##      Estimates   Estimates2
## 2 -0.008329389 -0.008329389
## 3 -0.012500494 -0.012500494
## 4 -0.003611207 -0.003611207
## 5  0.006348707  0.006348707
## 6 -0.003159233 -0.003159233
## 7  0.008386293  0.008386293

Regression Predictions

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
newCAC<-data.frame(CAC=seq(-.05,.05,.01))
PredictedModel<-predict(lm(DAX~CAC,Returns),newCAC,se.fit=TRUE)
cbind(newCAC,PredictedModel$fit,PredictedModel$se.fit)
##      CAC PredictedModel$fit PredictedModel$se.fit
## 1  -0.05      -0.0339389388          0.0007593019
## 2  -0.04      -0.0270806912          0.0006164270
## 3  -0.03      -0.0202224436          0.0004761139
## 4  -0.02      -0.0133641959          0.0003415345
## 5  -0.01      -0.0065059483          0.0002233078
## 6   0.00       0.0003522993          0.0001623148
## 7   0.01       0.0072105469          0.0002146743
## 8   0.02       0.0140687946          0.0003302774
## 9   0.03       0.0209270422          0.0004640479
## 10  0.04       0.0277852898          0.0006040340
## 11  0.05       0.0346435374          0.0007467481

Test Parameter Significance

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044676 -0.004302 -0.000076  0.004093  0.040100 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0003523  0.0001623    2.17   0.0301 *  
## CAC         0.6858248  0.0147070   46.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006993 on 1857 degrees of freedom
## Multiple R-squared:  0.5394, Adjusted R-squared:  0.5391 
## F-statistic:  2175 on 1 and 1857 DF,  p-value: < 2.2e-16
h0=0 # set to your hypotehsized value
tstat = (summary(reg1)$coefficients[1,1]-h0)/summary(reg1)$coefficients[1,2]
pvalue = pt(q=abs(tstat),df = reg1$df,lower.tail = FALSE)*2
pvalue
## [1] 0.03009767

Test Param. Sig. One-sided

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.044676 -0.004302 -0.000076  0.004093  0.040100 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0003523  0.0001623    2.17   0.0301 *  
## CAC         0.6858248  0.0147070   46.63   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006993 on 1857 degrees of freedom
## Multiple R-squared:  0.5394, Adjusted R-squared:  0.5391 
## F-statistic:  2175 on 1 and 1857 DF,  p-value: < 2.2e-16
print('H0: slope > 0')
## [1] "H0: slope > 0"
h0=0 # set to your hypothesized value
tstat = (summary(reg1)$coefficients[1,1]-h0)/summary(reg1)$coefficients[1,2]
pvalue = pt(q=tstat,df = reg1$df,lower.tail = TRUE)
pvalue
## [1] 0.9849512
print('H0: slope < 0')
## [1] "H0: slope < 0"
h0=0 # set to your hypothesized value
tstat = (summary(reg1)$coefficients[1,1]-h0)/summary(reg1)$coefficients[1,2]
pvalue = 1-pt(q=tstat,df = reg1$df,lower.tail = TRUE)
pvalue
## [1] 0.01504883
pvalue = pt(q=tstat,df = reg1$df,lower.tail = FALSE)
pvalue
## [1] 0.01504883

Test Joint Hypothesis

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.046142 -0.003848 -0.000009  0.003987  0.033370 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002694  0.0001542   1.747   0.0807 .  
## CAC         0.5152838  0.0183380  28.099   <2e-16 ***
## FTSE        0.3644971  0.0254199  14.339   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared:  0.5853, Adjusted R-squared:  0.5849 
## F-statistic:  1310 on 2 and 1856 DF,  p-value: < 2.2e-16
library(car)
# Test if the coefficients are equal
linearHypothesis(reg1,c("CAC=FTSE"))
## Linear hypothesis test
## 
## Hypothesis:
## CAC - FTSE = 0
## 
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
## 
##   Res.Df      RSS Df  Sum of Sq      F    Pr(>F)    
## 1   1857 0.082383                                   
## 2   1856 0.081752  1 0.00063101 14.326 0.0001586 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Test if the coefficients take specific values jointly
linearHypothesis(reg1,c("CAC=0","FTSE=.36"))
## Linear hypothesis test
## 
## Hypothesis:
## CAC = 0
## FTSE = 0.36
## 
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
## 
##   Res.Df      RSS Df Sum of Sq   F    Pr(>F)    
## 1   1858 0.142273                               
## 2   1856 0.081752  2  0.060521 687 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test Joint Hyp. Wald

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Restricted
reg2<-lm(DAX~.,Returns) #Unrestricted
anova(reg1,reg2)
## Analysis of Variance Table
## 
## Model 1: DAX ~ CAC
## Model 2: DAX ~ SMI + CAC + FTSE
##   Res.Df      RSS Df Sum of Sq      F    Pr(>F)    
## 1   1857 0.090808                                  
## 2   1855 0.067909  2    0.0229 312.76 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Variance Infl. Factor

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~.,Returns) #Run Regression for all assets ("." gives all assets not DAX)
library(car)
vif(reg1)
##      SMI      CAC     FTSE 
## 1.781686 2.023629 1.908167
#VIF>5 indicates multicollinearity

BPTest-Heteroscedasticity

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~.,Returns) #Run Regression for all assets ("." gives all assets not DAX)
library(lmtest)
#H0:Homoscedasticity (i.e. var(residuals)=constant)
bptest(reg1)
## 
##  studentized Breusch-Pagan test
## 
## data:  reg1
## BP = 7.3734, df = 3, p-value = 0.0609

White StdErr. Hetero.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.046142 -0.003848 -0.000009  0.003987  0.033370 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002694  0.0001542   1.747   0.0807 .  
## CAC         0.5152838  0.0183380  28.099   <2e-16 ***
## FTSE        0.3644971  0.0254199  14.339   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared:  0.5853, Adjusted R-squared:  0.5849 
## F-statistic:  1310 on 2 and 1856 DF,  p-value: < 2.2e-16
library(car)
# Examine CAC 
linearHypothesis(reg1,c("CAC=0"))
## Linear hypothesis test
## 
## Hypothesis:
## CAC = 0
## 
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
## 
##   Res.Df      RSS Df Sum of Sq      F    Pr(>F)    
## 1   1857 0.116530                                  
## 2   1856 0.081752  1  0.034778 789.56 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Examine CAC with hetero robust std errors
linearHypothesis(reg1,c("CAC=0"),white.adjust = TRUE)
## Linear hypothesis test
## 
## Hypothesis:
## CAC = 0
## 
## Model 1: restricted model
## Model 2: DAX ~ CAC + FTSE
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F    Pr(>F)    
## 1   1857                        
## 2   1856  1 274.82 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hetero.Robust T-Test

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
print(summary(reg1))
## 
## Call:
## lm(formula = DAX ~ CAC + FTSE, data = Returns)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.046142 -0.003848 -0.000009  0.003987  0.033370 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0002694  0.0001542   1.747   0.0807 .  
## CAC         0.5152838  0.0183380  28.099   <2e-16 ***
## FTSE        0.3644971  0.0254199  14.339   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006637 on 1856 degrees of freedom
## Multiple R-squared:  0.5853, Adjusted R-squared:  0.5849 
## F-statistic:  1310 on 2 and 1856 DF,  p-value: < 2.2e-16
library("lmtest")
library("sandwich")
coeftest(reg1, vcov = vcovHC(reg1, type = "HC0"))
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.00026938 0.00015441  1.7446  0.08122 .  
## CAC         0.51528381 0.03061855 16.8291  < 2e-16 ***
## FTSE        0.36449714 0.03611028 10.0940  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Hetero.Robust Wald Test

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC,Returns) #Restricted
reg2<-lm(DAX~.,Returns) #Unrestricted
anova(reg1,reg2)
## Analysis of Variance Table
## 
## Model 1: DAX ~ CAC
## Model 2: DAX ~ SMI + CAC + FTSE
##   Res.Df      RSS Df Sum of Sq      F    Pr(>F)    
## 1   1857 0.090808                                  
## 2   1855 0.067909  2    0.0229 312.76 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(lmtest)
library(sandwich)
waldtest(reg1, reg2, vcov = vcovHC(reg2, type = "HC0"))
## Wald test
## 
## Model 1: DAX ~ CAC
## Model 2: DAX ~ SMI + CAC + FTSE
##   Res.Df Df     F    Pr(>F)    
## 1   1857                       
## 2   1855  2 170.3 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Test for serial correlation

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
library(lmtest)
dwtest(reg1) #Durbin Watson
## 
##  Durbin-Watson test
## 
## data:  reg1
## DW = 1.9577, p-value = 0.1799
## alternative hypothesis: true autocorrelation is greater than 0

Test for serial correlation 2

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
library(lmtest)
#perform Breusch-Godfrey test
# H0: no serial correlation up to indicated order
bgtest(reg1, order=3)
## 
##  Breusch-Godfrey test for serial correlation of order up to 3
## 
## data:  reg1
## LM test = 0.80293, df = 3, p-value = 0.8488

Test for serial correlation 3

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
library(lmtest)
# H0: errors are independent; no serial correlation
Box.test(reg1$residuals, lag=5,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  reg1$residuals
## X-squared = 0.9576, df = 5, p-value = 0.9659

Fixing Serial correlation

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(PortfolioAnalytics)
data("EuStockMarkets")
Prices<-as.data.frame(EuStockMarkets)
T = dim(Prices)[1]
Returns<-log(Prices[2:T,]/Prices[1:T-1,])
reg1<-lm(DAX~CAC+FTSE,Returns) #Run Regression
reg1summary<-summary(reg1)
#Here, we will  use newey west robust errors. The pre-whitening and adjust are set to F, and T respectively  to ensure the proper formula and small sample adjustments are made. 
  #https://www.econometrics-with-r.org/15-4-hac-standard-errors.html
library(sandwich)
#NW_VCOV_msft <- NeweyWest(lm(as.numeric(msftXS)~as.numeric(spyXS)), prewhite = F, adjust = T)
NW_VCOV <- NeweyWest(reg1, prewhite = F, adjust = T)

#compute standard errors 
hac_err_CAC=sqrt(diag(NW_VCOV))[2] # the [2] is to retrieve the second element, which corresponds to the "independent" variable. 
#Compare the standard Errors
cbind(reg1summary$coefficients[2,2],hac_err_CAC)
##                hac_err_CAC
## CAC 0.01833804  0.03779094
#Compute new t-stat
newCACt=(reg1$coefficients['CAC']-0)/hac_err_CAC
#Compare the t-stats
cbind(reg1summary$coefficients[2,3],newCACt)
##               newCACt
## CAC 28.09917 13.63512

Equilibrium Pricing Models

CAPM

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("MSFT","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:2],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "MSFT"  "^GSPC"
getSymbols(tickers_code[3],src = 'FRED')
## [1] "TB4WK"
### Data processing
tickers = gsub("[[:punct:]]", "", tickers_code)
# Prices
Price = do.call(merge, lapply(tickers[1:2], function(x) Ad(get(x))))
names(Price) = lapply(tickers[1:2], function(x) paste(x,".Price",sep=""))
# Returns
{Return = do.call(merge,lapply(Price, function(x) 
        periodReturn(x,period='monthly',type='log')))}
names(Return) = lapply(tickers[1:2], function(x) paste(x,".Return",sep=""))
# Risk free rate
Rf = TB4WK["2016-01-01/2020-12-31"] # this is an annual rate by default
Rf = (Rf/100+1)^(1/12)-1 # convert to monthly date
Rf = log(1+Rf) # convert to log
names(Rf) = "Rf"
Asset = do.call(merge,list(Price,Return,Rf))### merge data together
Asset = na.omit(Asset)#clean NA's
Asset$GSPC.ExcessReturn=Asset$GSPC.Return-Asset$Rf
Asset$MSFT.ExcessReturn=Asset$MSFT.Return-Asset$Rf
CAPMModel=lm(MSFT.ExcessReturn~GSPC.ExcessReturn,data=Asset) #run CAPM
print(summary(CAPMModel))
## 
## Call:
## lm(formula = MSFT.ExcessReturn ~ GSPC.ExcessReturn, data = Asset)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.120639 -0.023982  0.004723  0.021406  0.076649 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       0.016359   0.005011   3.265  0.00184 ** 
## GSPC.ExcessReturn 0.802098   0.111760   7.177 1.48e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03798 on 58 degrees of freedom
## Multiple R-squared:  0.4704, Adjusted R-squared:  0.4612 
## F-statistic: 51.51 on 1 and 58 DF,  p-value: 1.478e-09

CAPM Many Assets

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(quantmod)

startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("IBM","AAPL","GOOG","FB","MSFT","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:6],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "IBM"   "AAPL"  "GOOG"  "FB"    "MSFT"  "^GSPC"
getSymbols(tickers_code[7],src = 'FRED')
## [1] "TB4WK"
library(tidyverse)
# risk free rate
Rf = TB4WK["2016-01-01/2020-03-31"] # annual rate
Rf = (Rf/100+1)^(1/12)-1 # convert to month rate
Rf = log(1+Rf) # converting to log returns
names(Rf) = "Rf"
# market excess return
ExcessReturn.Market = data.frame(periodReturn(Ad(get("GSPC")),
                                period = "monthly",type='log')[-1,]-Rf)
# stocks' excess return
df <- tibble(Ticker = tickers_code[1:5],
             ExcessReturn.Stock = do.call(c,lapply(Ticker, function(x) 
               data.frame(periodReturn(Ad(get(x)),type='log')[-1,]-Rf))),
             ExcessReturn.Market = rep(ExcessReturn.Market,5),
             #Date = index(Rf)
             Date = do.call(c,lapply(Ticker, function(x) (list(index(Rf)))))
             )

library(plyr)
# convert to long table
df_long = df %>% unnest(colnames(df))
#Break up df_long by Ticker, then fit the specified model to each piece and return a list
models <- dlply(df_long, "Ticker", function(x) lm(ExcessReturn.Stock~1+ExcessReturn.Market, data = x))

# Apply coef to each model and return a data frame
coefs=ldply(models, coef)
names(coefs) = c("Ticker","Intercept","Beta")
print(coefs)
##   Ticker    Intercept      Beta
## 1   AAPL  0.013577109 1.1372826
## 2     FB  0.004428500 1.0303281
## 3   GOOG  0.003823518 0.9839200
## 4    IBM -0.006817774 1.4072021
## 5   MSFT  0.018216833 0.8067236

CAPM Rolling

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("IBM","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:2],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "IBM"   "^GSPC"
getSymbols(tickers_code[3],src = 'FRED')
## [1] "TB4WK"
library(tidyverse)
# risk free rate
Rf = TB4WK["2016-01-01/2020-03-31"] # annual rate
Rf = (Rf/100+1)^(1/12)-1 # convert to month rate
Rf = log(1+Rf) # converting to log returns
names(Rf) = "Rf"
# market excess return
ExcessReturn.Market = data.frame(periodReturn(Ad(get("GSPC")),
                                period = "monthly",type='log')[-1,]-Rf)
# stocks' excess return
df <- tibble(Ticker = tickers_code[1],
             ExcessReturn.Stock = do.call(c,lapply(Ticker, function(x) 
               data.frame(periodReturn(Ad(get(x)),type='log')[-1,]-Rf))),
             ExcessReturn.Market = rep(ExcessReturn.Market,1),
             #Date = index(Rf)
             Date = do.call(c,lapply(Ticker, function(x) (list(index(Rf)))))
             )

library(plyr)
# convert to long table
df_long = df %>% unnest(colnames(df))
#Break up df_long by Ticker, then fit the specified model to each piece and return a list
models <- dlply(df_long, "Ticker", function(x) lm(ExcessReturn.Stock~1+ExcessReturn.Market, data = x))

# Apply coef to each model and return a data frame
coefs=ldply(models, coef)
names(coefs) = c("Ticker","Intercept","Beta")
library(rollRegres)
rollmodels <- dlply(df_long, "Ticker", function(x) roll_regres(ExcessReturn.Stock~1+ExcessReturn.Market,
                          x,width = 24L,
                          do_compute = c("sigmas", "r.squareds")))
# rolling coefficients
rollcoefs=ldply(rollmodels, function(x) x$coefs)
rollcoefs$Date = rep(index(Rf),1)
rollcoefs = na.omit(rollcoefs)
rollcoefs=rollcoefs[order(rollcoefs$Date,rollcoefs$Ticker),]
row.names(rollcoefs) =NULL
names(rollcoefs) = c("Ticker","Alpha","Beta","Date")
head(rollcoefs,10)
##    Ticker        Alpha     Beta       Date
## 1     IBM -0.012153540 1.804071 2017-12-01
## 2     IBM -0.012946989 1.776356 2018-01-01
## 3     IBM -0.013550875 1.716319 2018-02-01
## 4     IBM -0.008525027 1.187984 2018-03-01
## 5     IBM -0.009532404 1.207887 2018-04-01
## 6     IBM -0.012440405 1.145113 2018-05-01
## 7     IBM -0.012534233 1.148245 2018-06-01
## 8     IBM -0.012868609 1.097941 2018-07-01
## 9     IBM -0.013262752 1.087013 2018-08-01
## 10    IBM -0.011910720 1.071676 2018-09-01
colors = c("darkred")
linestypes = c("solid")
shapes = c(15)
ggplot(data = rollcoefs,
       aes(x=Date,y=Beta,group = Ticker,color = Ticker, lty = Ticker,shape = Ticker))+
  geom_point()+
  geom_line()+
  scale_x_date(date_breaks = "6 months" , date_labels = "%Y-%m")+
  scale_color_manual(values = alpha(colors,0.5)) +
  scale_linetype_manual(values = linestypes)+ 
  scale_shape_manual(values=shapes)+
  labs(x="Date", y="",title = "Rolling Beta (estimated window = 2 year)")

Security Market Line

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(quantmod)
startd<-"2015-12-01"
endd<-"2020-12-31"
freq<-"monthly"
tickers_code <- c("IBM","F","GE","CAT","MSFT","^GSPC","TB4WK") # GSPC=SP500; TB4WK=1mt Treasury Yield
getSymbols(tickers_code[1:6],from = startd, to =endd, periodicity = freq, src = 'yahoo')
## [1] "IBM"   "F"     "GE"    "CAT"   "MSFT"  "^GSPC"
getSymbols(tickers_code[7],src = 'FRED')
## [1] "TB4WK"
library(tidyverse)
# risk free rate
Rf = TB4WK["2016-01-01/2020-03-31"] # annual rate
Rf = (Rf/100+1)^(1/12)-1 # convert to month rate
Rf = log(1+Rf) # converting to log returns
names(Rf) = "Rf"
# market excess return
ExcessReturn.Market = data.frame(periodReturn(Ad(get("GSPC")),
                                period = "monthly",type='log')[-1,]-Rf)
# stocks' excess return
df <- tibble(Ticker = tickers_code[1:5],
             ExcessReturn.Stock = do.call(c,lapply(Ticker, function(x) 
               data.frame(periodReturn(Ad(get(x)),type='log')[-1,]-Rf))),
             ExcessReturn.Market = rep(ExcessReturn.Market,5),
             #Date = index(Rf)
             Date = do.call(c,lapply(Ticker, function(x) (list(index(Rf)))))
             )

library(plyr)
# convert to long table
df_long = df %>% unnest(colnames(df))
#Break up df_long by Ticker, then fit the specified model to each piece and return a list
models <- dlply(df_long, "Ticker", function(x) lm(ExcessReturn.Stock~1+ExcessReturn.Market, data = x))

# Apply coef to each model and return a data frame
coefs=ldply(models, coef)
names(coefs) = c("Ticker","Intercept","Beta")
### expected Rf and market return
Rf_avg = mean(Rf)
Mkt_avg = mean(ExcessReturn.Market$monthly.returns)+Rf_avg

### require return from CAPM
coefs$RequireReturn = Rf_avg + coefs$Beta*(Mkt_avg-Rf_avg)
coefs$ExpectedReturn = ddply(df_long,"Ticker",summarise,
                  Mean = mean(ExcessReturn.Stock,na.rm = TRUE))$Mean+Rf_avg
head(coefs)
##   Ticker    Intercept      Beta RequireReturn ExpectedReturn
## 1    CAT  0.007489770 1.2497801   0.005495941   0.0129857105
## 2      F -0.022175324 1.5373042   0.006525288  -0.0156500368
## 3     GE -0.029873270 1.3534185   0.005866970  -0.0240063002
## 4    IBM -0.006817773 1.4072019   0.006059516  -0.0007582564
## 5   MSFT  0.018216833 0.8067241   0.003909783   0.0221266164
colors = c("darkred","steelblue","darkgreen","yellow4","darkblue")
linestypes = c("solid","longdash","twodash","dashed","dotdash")
shapes = c(15:19)
ggplot(coefs,aes(x = Beta,color = Ticker))+
  geom_abline(intercept = Rf_avg,slope = Mkt_avg-Rf_avg,color="grey",size = 2, alpha =0.6)+
  geom_point(aes(y=ExpectedReturn,color = Ticker),size = 3,shape=15)+
  geom_point(aes(y=RequireReturn,color = Ticker),size = 3)+
  geom_point(aes(x=0,y=Rf_avg),color = "darkgreen",size=5,shape = 2)+
  geom_point(aes(x=1,y=Mkt_avg),color = "darkgreen",size=5,shape = 2)+
  annotate(geom="text", x=1.08, y=Mkt_avg-0.001, label="M(Market portfolio)",
              color="darkgreen")+
  annotate(geom="text", x=0.1, y=Rf_avg, label="Risk-free Rate",
              color="darkgreen")+
  geom_segment(aes(x = 1, y = 0, xend = 1, yend = Mkt_avg),linetype="dashed")+
  geom_segment(aes(x = 0, y = Mkt_avg, xend = 1, yend = Mkt_avg),linetype="dashed")+
  scale_color_manual(values = alpha(colors,0.7)) +
  scale_fill_manual(values = alpha(colors,0.7))+
  scale_y_continuous(labels = scales::percent)+
  scale_x_continuous(expand = c(0.0001, 0, 0.1, 0.1))+
  labs(x="Beta", y="Return",title = "Security Market Line")+
  theme(panel.border = element_blank())

APT

### Housekeeping
rm(list=ls()) # clear workspace
cat("\014")  # clear console
# prepare library
library(quantmod)
library(tidyverse)
library(readr)
library(car)
library(lmtest)
library(AER)
library(lubridate)
library(xts)
### Download Dow Constituents price data from Yahoo Finance
# set parameters
start_date<-"2015-12-01"
end_date<-"2020-12-31"
freq<-"daily"
tickers_code <- c("AXP","AMGN","AAPL","BA","CAT","CSCO","CVX","GS","HD","HON","IBM","INTC","JNJ","KO","JPM","MCD","MMM","MRK","MSFT","NKE","PG","TRV","UNH","CRM","VZ","V","WBA","WMT","DIS")

# pull stock price from yahoo 
getSymbols(tickers_code,from=start_date,to=end_date,periodicity=freq,src='yahoo')
##  [1] "AXP"  "AMGN" "AAPL" "BA"   "CAT"  "CSCO" "CVX"  "GS"   "HD"   "HON" 
## [11] "IBM"  "INTC" "JNJ"  "KO"   "JPM"  "MCD"  "MMM"  "MRK"  "MSFT" "NKE" 
## [21] "PG"   "TRV"  "UNH"  "CRM"  "VZ"   "V"    "WBA"  "WMT"  "DIS"
### Load Factor Data
# download factor data from Ken French website via FTP
download.file(url = "https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip", destfile = "F-F_Research_Data_Factors_CSV.zip",mode="wb")
# read the contents and extract the desired dates
Factors <- read_csv("F-F_Research_Data_Factors_CSV.zip", skip = 3, col_names = T) %>%
  na.omit()%>%
  dplyr::rename("Date" = "...1") %>%
  dplyr::mutate_all(as.numeric) %>%
  filter(Date > 196301)
# Format the Factor data frame
FFdate<-as.Date(paste0(as.character(Factors$Date), '01'), format='%Y%m%d')
# let's only keep market factor and RF (since we only want to run CAPM later) and convert them to log return
FFdata<-log(select(Factors, -Date)/100+1)
FFxts<-xts(FFdata,order.by=FFdate)[paste(start_date, "/", end_date, sep = "")]
# print the first several rows
head(FFxts)
##                   Mkt-RF          SMB          HML         RF
## 2015-12-01 -0.0219389075 -0.028502360 -0.026241311 9.9995e-05
## 2016-01-01 -0.0594315838 -0.034487930  0.020390690 9.9995e-05
## 2016-02-01 -0.0007002451  0.008067371 -0.005716307 1.9998e-04
## 2016-03-01  0.0672847468  0.007571265  0.010939940 1.9998e-04
## 2016-04-01  0.0090588445  0.006677655  0.031595562 9.9995e-05
## 2016-05-01  0.0176434352 -0.001901807 -0.016739324 9.9995e-05
# Compute the excess return and join excess return with factor data
# use `Ad()` to get adjusted closed price
Price = do.call(merge, lapply(tickers_code, function(x) Ad(get(x))))
names(Price) = lapply(tickers_code, function(x) paste(x,".Price",sep=""))
# Extract the last days of each month
Price = Price[endpoints(Price, on='months')]
# Alter the date to match with other series
Price = xts(x = Price, order.by = floor_date(index(Price), "month")) %>%
  na.omit()

# Let's keep only the appropriate factor data (price data except the first line, since it will become NA after calculating returns)
FFxts<-FFxts[index(Price)[-1]]

# stocks' excess return
df <- tibble(Ticker = tickers_code,
             ExcessReturn.Stock = do.call(c,lapply(tickers_code, function(x)
               data.frame(periodReturn(Price[,paste(x,".Price",sep = "")], type='log')[-1,]-FFxts$RF))),
             Date = do.call(c,lapply(Ticker, function(x) (list(index(FFxts)))))
             )

# Tibble for Factor
FF_df <- tibble(Date = index(Price)[-1],
                as.tibble(FFxts)) %>%
  select(-RF)

# convert to long table and join with the factors
df_long = df %>%
  unnest(colnames(df)) %>%
  inner_join(FF_df)
head(df_long)
## # A tibble: 6 × 6
##   Ticker ExcessReturn.Stock Date        `Mkt-RF`      SMB      HML
##   <chr>               <dbl> <date>         <dbl>    <dbl>    <dbl>
## 1 AXP              -0.258   2016-01-01 -0.0594   -0.0345   0.0204 
## 2 AXP               0.0379  2016-02-01 -0.000700  0.00807 -0.00572
## 3 AXP               0.0994  2016-03-01  0.0673    0.00757  0.0109 
## 4 AXP               0.0683  2016-04-01  0.00906   0.00668  0.0316 
## 5 AXP               0.00493 2016-05-01  0.0176   -0.00190 -0.0167 
## 6 AXP              -0.0743  2016-06-01 -0.000500  0.00588 -0.0146
### First-pass Regression
# Apply FF3 regression using market excess return, SMB and HML as independent variables

# Create the IDs for stocks since we cannot use Tickers directly in group_by
Stock_IDs <- tibble(Ticker = unique(df_long$Ticker),
                 IDs = 1:length(unique(df_long$Ticker)))

# apply the full sample regression to calculate the full sample betas for all stocks
FF3_step1 <- df_long %>%
  # unite the Stock_IDs
  inner_join(Stock_IDs) %>%
  select(-c(Date, Ticker)) %>%
  group_by(IDs) %>%
  do(model = lm(ExcessReturn.Stock ~ `Mkt-RF` + SMB + HML, data = .) %>%
       coefficients())

# Unwrap the results
FF3_step1_result <- do.call(rbind.data.frame, FF3_step1$model) %>%
  as_tibble()
# Rename the variables
names(FF3_step1_result) = names(Factors)[-length(Factors)]
names(FF3_step1_result)[1] = "Alpha"

# Add in the Stock_IDs, then add in Ticker
FF3_step1_result <- Stock_IDs %>%
  dplyr::inner_join(cbind(FF3_step1[, 1], FF3_step1_result)) %>%
  select(-IDs)

head(FF3_step1_result)
## # A tibble: 6 × 5
##   Ticker    Alpha `Mkt-RF`      SMB    HML
##   <chr>     <dbl>    <dbl>    <dbl>  <dbl>
## 1 AXP    -0.00271    1.27   0.00820  0.331
## 2 AMGN   -0.00689    0.794 -0.0243  -0.573
## 3 AAPL    0.00565    1.42  -0.476   -0.706
## 4 BA     -0.00233    1.50   0.130    0.953
## 5 CAT     0.00865    0.824  0.436    0.115
## 6 CSCO   -0.00167    0.922 -0.0523  -0.115
### Second-pass Regression
# Re-run the FF3 using betas as independent variables
# compute mean returns
Second_reg_data <- df_long %>%
  # group by ticker then do calculation
  dplyr::group_by(Ticker) %>%
  # use summarise to calculate the mean excess return
  dplyr::summarise(MeanEXRet = mean(ExcessReturn.Stock)) %>%
  # join with beta estimates
  dplyr::inner_join(FF3_step1_result) %>%
  dplyr::select(-Alpha)

# run FF3 regression using betas as independent variables
FF3_step2 <- lm(MeanEXRet ~ `Mkt-RF` + SMB + HML, data = Second_reg_data)
summary(FF3_step2)
## 
## Call:
## lm(formula = MeanEXRet ~ `Mkt-RF` + SMB + HML, data = Second_reg_data)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0166380 -0.0027617 -0.0003736  0.0035955  0.0109143 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.002267   0.004161   0.545   0.5907  
## `Mkt-RF`     0.008546   0.004161   2.054   0.0506 .
## SMB         -0.003547   0.003924  -0.904   0.3746  
## HML         -0.007220   0.002881  -2.507   0.0191 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.006452 on 25 degrees of freedom
## Multiple R-squared:  0.3173, Adjusted R-squared:  0.2354 
## F-statistic: 3.874 on 3 and 25 DF,  p-value: 0.02104

Time Series Models

ACF

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01" #We want from Jan1990 forward. 
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
acf(GDP)

PACF

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01" #We want from Jan1990 forward. 
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
pacf(GDP)

KPSS Test Stationarity

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)


library(tseries)
# KPSS test for trend stationary
kpss.test(GDP, null="Trend")
## 
##  KPSS Test for Trend Stationarity
## 
## data:  GDP
## KPSS Trend = 0.37964, Truncation lag parameter = 4, p-value = 0.01
# H0: trend stationarity 
# Interpret: Low p-value -->reject the null hypothesis of trend stationary

ADF Test Unit Root

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)

# ADF Test for unit root, which is a common type of non-stationary
library(tseries)
adf.test(GDP, alternative = "stationary") 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  GDP
## Dickey-Fuller = -1.9904, Lag order = 4, p-value = 0.5806
## alternative hypothesis: stationary
# H0: Not stationary
# Interpret: High p-value -->fail to reject the null hypothesis of non-stationary

Fix NonStationary w/Diff.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP = Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
colnames(GDP)[1]="Level" # Add column name 

# Use ADF test to check if it is non-stationary 
library(tseries)
adf.test(GDP$Level)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  GDP$Level
## Dickey-Fuller = -1.9904, Lag order = 4, p-value = 0.5806
## alternative hypothesis: stationary
# Interpret: High p-value -->fail to reject the null hypothesis of non-stationary. This series is non-stationary

In order to get this in a commonly used format, we will first take logs, noting that the a first diff of logs is a % change.

GDP$LogLevel<- log(GDP$Level) 
GDP$FirstDifLog<- diff(GDP$LogLevel, differences = 1)
GDP<-na.omit(GDP) # get rid of the NAs

# Now use the ADF test again to check if it is still non-stationary 
adf.test(GDP$FirstDifLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  GDP$FirstDifLog
## Dickey-Fuller = -4.2675, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Interpret: Low p-value --> reject the null hypothesis of non-stationarity
# Our data apears to now be stationary due to differencing

Cointegration

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(quantmod)

startd <-"2013-01-01"  
endd <-"2013-12-31"
SPY <- Ad(getSymbols("SPY", from = startd, to = endd, auto.assign = FALSE)) # get the adjusted closed 
IVV <- Ad(getSymbols("IVV", from = startd, to = endd, auto.assign = FALSE)) # get the adjusted closed 

 
### Step 1: Finding the integration order of each series
library(tseries)
kpss.test(SPY, null="Trend") # Low p-value --> reject H0 -->Non stationary. 
## 
##  KPSS Test for Trend Stationarity
## 
## data:  SPY
## KPSS Trend = 0.20051, Truncation lag parameter = 5, p-value = 0.01581
kpss.test(IVV, null="Trend") # Low p-value --> reject H0 -->Non stationary. 
## 
##  KPSS Test for Trend Stationarity
## 
## data:  IVV
## KPSS Trend = 0.19944, Truncation lag parameter = 5, p-value = 0.01621
# Take first difference
SPY_d1<-diff(SPY,differences = 1)
IVV_d1<-diff(IVV,differences = 1)
# ADF test for unit root
adf.test(na.omit(SPY_d1)) # low p-value --> reject H0 -->SPY_d1 is stationary --> SPY is I(1).
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(SPY_d1)
## Dickey-Fuller = -5.742, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
adf.test(na.omit(IVV_d1)) # low p-value --> reject H0 -->IVV_d1 is stationary --> IVV is I(1).
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(IVV_d1)
## Dickey-Fuller = -5.711, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Interpret: Series Y_t(IVV) and X_t(SPY) have the same integration order (1)


### Step 2: Estimate cointegration coefficient and get residuals
fit<-lm(IVV~SPY)
res<-fit$residuals

### Step 3: Do ADF test for unit root
adf.test(res) # Low p-value --> reject H0--> Residuals are stationary. 
## 
##  Augmented Dickey-Fuller Test
## 
## data:  res
## Dickey-Fuller = -6.0557, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
#Interpret: Y_t(IVV) and X_t(SPY) are co-integrated.

ARIMA

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs


# Specify the ARIMA model you with your choice or p, q values...(p=AR order, d=differencing, q= MA order)
AR1 <- arima(GDPGrowth, order = c(1, 0, 0)) # fit an AR(1) model 
AR1 # print the model 
## 
## Call:
## arima(x = GDPGrowth, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.4050      6e-03
## s.e.  0.0858      9e-04
## 
## sigma^2 estimated as 2.917e-05:  log likelihood = 425.77,  aic = -845.54
MA1 <- arima(GDPGrowth, order = c(0, 0, 1)) # fit an MA(1) model 
MA1 # print the model 
## 
## Call:
## arima(x = GDPGrowth, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       0.2868      6e-03
## s.e.  0.0733      7e-04
## 
## sigma^2 estimated as 3.097e-05:  log likelihood = 422.46,  aic = -838.91
AR1MA1 <- arima(GDPGrowth, order = c(1, 0, 1)) # fit an ARMA(1,1) model 
AR1MA1 # print the model 
## 
## Call:
## arima(x = GDPGrowth, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1      ma1  intercept
##       0.7001  -0.3553     0.0060
## s.e.  0.1368   0.1750     0.0011
## 
## sigma^2 estimated as 2.828e-05:  log likelihood = 427.46,  aic = -846.92

ARIMA-Auto Lag Select

We can use the auto.arima() function from the forecast package to ask R to return the best ARIMA model according to either AIC, AICc or BIC value.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs

library(forecast)
AR <- auto.arima(GDPGrowth, max.q = 0) # set the maximum value of MA order q=0 to auto fit an AR model
AR # print the model 
## Series: GDPGrowth 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2   mean
##       0.3292  0.1861  0.006
## s.e.  0.0921  0.0924  0.001
## 
## sigma^2 = 2.89e-05:  log likelihood = 427.76
## AIC=-847.52   AICc=-847.15   BIC=-836.65
MA <- auto.arima(GDPGrowth, max.p = 0) # set the maximum value of AR order p=0 to auto fit an AR model
MA # print the model 
## Series: GDPGrowth 
## ARIMA(0,0,3) with non-zero mean 
## 
## Coefficients:
##          ma1     ma2     ma3    mean
##       0.3227  0.2925  0.1340  0.0061
## s.e.  0.0921  0.0957  0.0816  0.0009
## 
## sigma^2 = 2.937e-05:  log likelihood = 427.39
## AIC=-844.78   AICc=-844.21   BIC=-831.18
ARMA <- auto.arima(GDPGrowth) # fit an ARMA model(the fitted model might be AR, MA or ARIMA)
ARMA # print the model 
## Series: GDPGrowth 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2   mean
##       0.3292  0.1861  0.006
## s.e.  0.0921  0.0924  0.001
## 
## sigma^2 = 2.89e-05:  log likelihood = 427.76
## AIC=-847.52   AICc=-847.15   BIC=-836.65

SARIMA

A seasonal autoregressive integrated moving average (SARIMA) model is one step different from an ARIMA model based on the concept of seasonal trends.

Fit SARIMA Giving Model Orders

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2000-01-01"
endd<-"2018-01-01"
ticker <- "FRED/HOUSTNSA" #New Privately-Owned Housing Units Started: Total Units
HSNG <- Quandl(ticker, type="ts",start_date=startd, end_date=endd)
{plot(HSNG) 
abline(v = ts(c(2000,2005,2010,2015)),col = "red") # v is for vertical lines
abline(v = ts(c(2001,2002,2003,2004)), col = "blue", lty = 2)}

# Evidence of seasonality

### Decide the order of SARIMA
library(astsa)
acf2(HSNG, 48) # Interpret: The slow decay shown in the ACF is a sign that differencing may be required

##      [,1]  [,2]  [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## ACF  0.96  0.92  0.87 0.82 0.79 0.77 0.77 0.78 0.81  0.85  0.88  0.88  0.85
## PACF 0.96 -0.06 -0.12 0.06 0.15 0.15 0.13 0.21 0.26  0.22  0.08 -0.12 -0.32
##      [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
## ACF    0.8  0.74  0.69  0.65  0.62  0.61  0.61  0.63  0.65  0.67  0.67  0.64
## PACF  -0.4 -0.22 -0.05  0.14  0.00  0.05 -0.21  0.01 -0.02 -0.05  0.00 -0.03
##      [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
## ACF   0.58  0.52  0.46  0.42  0.39  0.37  0.36  0.37  0.38  0.39  0.39  0.35
## PACF -0.02 -0.08 -0.07  0.08 -0.05 -0.06 -0.05  0.03 -0.10  0.00  0.07 -0.04
##      [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF   0.30  0.24  0.19  0.15  0.12  0.10  0.10  0.10  0.12  0.13  0.12
## PACF  0.01  0.07  0.05  0.03  0.02  0.03 -0.04  0.02  0.09  0.01  0.01
acf2(diff(HSNG), 48) # Interpret: Even with the first order of differencing, we observe that there is still slow decay in the ACF plots at a seasonal lag period of 12. This thus suggest a seasonal difference to be applied.

##      [,1] [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  0.03 0.11 -0.07 -0.17 -0.18 -0.17 -0.21 -0.19 -0.09  0.08  0.28  0.41
## PACF 0.03 0.11 -0.08 -0.18 -0.17 -0.14 -0.21 -0.27 -0.22 -0.08  0.12  0.32
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF   0.35  0.06 -0.07 -0.22 -0.10 -0.22 -0.07 -0.31 -0.06  0.10  0.24  0.36
## PACF  0.40  0.20  0.03 -0.18 -0.03 -0.06  0.20 -0.02  0.01  0.04 -0.01  0.00
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF   0.28  0.12 -0.08 -0.22 -0.10 -0.14 -0.17 -0.20 -0.06  0.03   0.2  0.40
## PACF -0.01  0.05  0.04 -0.10  0.03  0.04  0.03 -0.07  0.08 -0.04  -0.1  0.01
##      [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF   0.19  0.09 -0.08 -0.15 -0.15 -0.13 -0.12 -0.18 -0.12  0.09  0.17  0.35
## PACF -0.03 -0.09 -0.07 -0.05 -0.04 -0.04  0.02 -0.03 -0.09 -0.01 -0.02  0.04
acf2(diff(diff(HSNG),12),48)

##       [,1]  [,2]  [,3] [,4]  [,5] [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12]
## ACF  -0.53  0.06  0.02 0.08 -0.06 0.06 -0.12  0.09 -0.04 -0.02  0.24 -0.48
## PACF -0.53 -0.32 -0.17 0.05  0.07 0.13 -0.05 -0.05 -0.07 -0.10  0.33 -0.28
##      [,13] [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## ACF   0.34 -0.09 -0.01 -0.06  0.09 -0.11  0.19 -0.18  0.04  0.07  0.00 -0.06
## PACF -0.05 -0.08 -0.07 -0.01  0.02  0.00  0.10  0.02 -0.11  0.00  0.25 -0.18
##      [,25] [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36]
## ACF   0.02  0.06  0.01 -0.05  0.02  0.07 -0.14  0.09  0.05 -0.11  0.00  0.08
## PACF  0.02  0.08  0.08  0.01  0.05  0.04 -0.03 -0.07  0.05  0.04  0.12 -0.14
##      [,37] [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48]
## ACF  -0.08  0.02 -0.06  0.04 -0.04 -0.02  0.03  0.00 -0.05  0.06 -0.06  0.03
## PACF -0.05  0.01 -0.08 -0.10 -0.06 -0.05 -0.05 -0.08  0.00 -0.01 -0.02 -0.11
# Interpret: 
# Seasonal Order: From the seasonal lag perspective, we can see that the ACF cuts off at the 2nd seasonal lag, while the PACF appears to tail off. This would suggest a SARMA model of (0,2). 
# ARMA Order: Within the first seasonal cycle, it can be seen that the ACF appears to be cutting off at lag = 1, while PACF appears to be cutting off at lag = 3. 
# Thus a proposed model can be ARMA (3,1) x Seasonal (0,3)(lag = 12) for the differenced time series. 


### Fit SARIMA Model
arima(HSNG, order = c(3,1,1), seasonal = list(order = c(0,1,2), period = 12))
## 
## Call:
## arima(x = HSNG, order = c(3, 1, 1), seasonal = list(order = c(0, 1, 2), period = 12))
## 
## Coefficients:
##          ar1     ar2     ar3      ma1     sma1     sma2
##       0.3734  0.3349  0.2163  -0.9246  -0.6783  -0.0807
## s.e.  0.0870  0.0733  0.0704   0.0493   0.0784   0.0782
## 
## sigma^2 estimated as 74.76:  log likelihood = -734.27,  aic = 1482.55
library(forecast)
sarima(HSNG, p=3, d=1, q=1, P=0, D=1, Q=2, S=12) # sarima() takes in arguments in the following order: data, ARIMA inputs (p,d,q), SARIMA inputs (P,D,Q), and seasonal lag S 
## initial  value 2.564457 
## iter   2 value 2.298184
## iter   3 value 2.195428
## iter   4 value 2.189850
## iter   5 value 2.183666
## iter   6 value 2.183389
## iter   7 value 2.183263
## iter   8 value 2.183249
## iter   9 value 2.183238
## iter  10 value 2.183220
## iter  11 value 2.183147
## iter  12 value 2.182860
## iter  13 value 2.182738
## iter  14 value 2.182723
## iter  15 value 2.181411
## iter  16 value 2.180273
## iter  17 value 2.180018
## iter  18 value 2.179797
## iter  19 value 2.179795
## iter  20 value 2.179794
## iter  21 value 2.179794
## iter  22 value 2.179794
## iter  23 value 2.179793
## iter  24 value 2.179793
## iter  24 value 2.179793
## iter  24 value 2.179793
## final  value 2.179793 
## converged
## initial  value 2.184982 
## iter   2 value 2.184962
## iter   3 value 2.184622
## iter   4 value 2.184601
## iter   5 value 2.184203
## iter   6 value 2.183855
## iter   7 value 2.183550
## iter   8 value 2.181614
## iter   9 value 2.180869
## iter  10 value 2.180682
## iter  11 value 2.180520
## iter  12 value 2.180464
## iter  13 value 2.180444
## iter  14 value 2.180441
## iter  15 value 2.180441
## iter  16 value 2.180440
## iter  17 value 2.180440
## iter  17 value 2.180440
## iter  17 value 2.180440
## final  value 2.180440 
## converged

## $fit
## 
## Call:
## arima(x = xdata, order = c(p, d, q), seasonal = list(order = c(P, D, Q), period = S), 
##     include.mean = !no.constant, transform.pars = trans, fixed = fixed, optim.control = list(trace = trc, 
##         REPORT = 1, reltol = tol))
## 
## Coefficients:
##          ar1     ar2     ar3      ma1     sma1     sma2
##       0.3734  0.3349  0.2163  -0.9246  -0.6783  -0.0807
## s.e.  0.0870  0.0733  0.0704   0.0493   0.0784   0.0782
## 
## sigma^2 estimated as 74.76:  log likelihood = -734.27,  aic = 1482.55
## 
## $degrees_of_freedom
## [1] 198
## 
## $ttable
##      Estimate     SE  t.value p.value
## ar1    0.3734 0.0870   4.2919  0.0000
## ar2    0.3349 0.0733   4.5670  0.0000
## ar3    0.2163 0.0704   3.0738  0.0024
## ma1   -0.9246 0.0493 -18.7387  0.0000
## sma1  -0.6783 0.0784  -8.6496  0.0000
## sma2  -0.0807 0.0782  -1.0320  0.3034
## 
## $AIC
## [1] 7.267385
## 
## $AICc
## [1] 7.269475
## 
## $BIC
## [1] 7.381242
### Forecast
sarima.for(HSNG, n.ahead = 20, p=3, d=1, q=1, P=0, D=1, Q=2, S=12) # forecast prediction for next 20 time points

## $pred
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2018            89.94963 102.27485 114.10191 114.38431 119.99422 118.98789
## 2019  93.38826  95.01798 107.50834 118.91902 119.63144 124.58177 123.90903
##            Aug       Sep       Oct       Nov       Dec
## 2018 110.90079 112.45802 113.53182 101.75552  91.62767
## 2019 115.99305 117.36446                              
## 
## $se
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2018            8.646530  9.477341 10.713797 12.129696 13.232276 14.373052
## 2019 20.580318 22.526882 23.813414 25.176268 26.565959 27.872729 29.174661
##            Aug       Sep       Oct       Nov       Dec
## 2018 15.479904 16.538307 17.580943 18.599896 19.598315
## 2019 30.455190 31.710785

Fit SARIMA Without Giving Model Orders

We can use the auto.arima() function from the forecast package and set seasonal = TRUE to ask R to return the best ARIMA/SARIMA model according to either AIC, AICc or BIC value.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2000-01-01"
endd<-"2018-01-01"
ticker <- "FRED/HOUSTNSA" #New Privately-Owned Housing Units Started: Total Units
HSNG <- Quandl(ticker, type="ts",start_date=startd, end_date=endd)
{plot(HSNG) 
abline(v = ts(c(2000,2005,2010,2015)),col = "red") # v is for vertical lines
abline(v = ts(c(2001,2002,2003,2004)), col = "blue", lty = 2)}

# Evidence of seasonality


### Fit SARIMA Model
library(forecast)
auto.arima(HSNG, seasonal = TRUE)
## Series: HSNG 
## ARIMA(1,1,1)(0,1,2)[12] 
## 
## Coefficients:
##           ar1      ma1     sma1     sma2
##       -0.2223  -0.3329  -0.6617  -0.1001
## s.e.   0.1168   0.1079   0.0783   0.0768
## 
## sigma^2 = 77.14:  log likelihood = -735.63
## AIC=1481.27   AICc=1481.57   BIC=1497.86
# We have a model with ARIMA order (p=1,d=1,q=1), seasonal order (P=0,D=1,Q=2), and seasonal lag S=12

Examine Model Residual

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs

library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2   mean
##       0.3292  0.1861  0.006
## s.e.  0.0921  0.0924  0.001
## 
## sigma^2 = 2.89e-05:  log likelihood = 427.76
## AIC=-847.52   AICc=-847.15   BIC=-836.65
### Ensure errors white noise. 
# Ideally, residuals should look like white noise, meaning they are 
# normally distributed.
# We will use tsdisplay to check the residuals for our optimal model. 
tsdisplay(residuals(fit), lag.max=45, main='Model Residuals')

# Interpret: The ACF and PACF indicate little to no persistence.  The time series plot shows little remaining patterns 
# to the data. 

Ljung–Box-Residual Indep.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs

library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2   mean
##       0.3292  0.1861  0.006
## s.e.  0.0921  0.0924  0.001
## 
## sigma^2 = 2.89e-05:  log likelihood = 427.76
## AIC=-847.52   AICc=-847.15   BIC=-836.65
### Compute the Ljung–Box test statistic for examining the null hypothesis of independence in a given time series. 
Box.test(residuals(fit),lag=4,type="Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(fit)
## X-squared = 0.0055806, df = 4, p-value = 1
# Ho: No autocorrelation
#Interpret: Given the high p-value, we fail to reject the null of no persistence through 4 lags. 

Kolmogorov-Smirnov Test

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs

library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2   mean
##       0.3292  0.1861  0.006
## s.e.  0.0921  0.0924  0.001
## 
## sigma^2 = 2.89e-05:  log likelihood = 427.76
## AIC=-847.52   AICc=-847.15   BIC=-836.65
### Let's also test the null of normal distribution via a KS test
ks.test(residuals(fit),pnorm)
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  residuals(fit)
## D = 0.49566, p-value < 2.2e-16
## alternative hypothesis: two-sided
#Interpret: Given the low pvalue, we reject the null, implying that the residuals aren't normally distributed

Quantile-Quantile Plot

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs

library(forecast)
fit <- auto.arima(GDPGrowth)
fit
## Series: GDPGrowth 
## ARIMA(2,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     ar2   mean
##       0.3292  0.1861  0.006
## s.e.  0.0921  0.0924  0.001
## 
## sigma^2 = 2.89e-05:  log likelihood = 427.76
## AIC=-847.52   AICc=-847.15   BIC=-836.65
### Let's see if the residuals look like a normal distribution with a qq plot
qqnorm(residuals(fit)); 
qqline(residuals(fit))

Model Comparison

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"1990-01-01"
endd<-"2018-01-01"
freq<-"quarterly"
GDP <- Quandl("FRED/GDPC1", type="xts",start_date=startd, end_date=endd)
GDPGrowth <- diff(log(GDP)) # calculate log growth rate
GDPGrowth <- na.omit(GDPGrowth) # get rid of the NAs
library(forecast)
Model1<-Arima(GDPGrowth, order = c(1, 0, 0)) # fit an AR(1) model 
Model1
## Series: GDPGrowth 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1   mean
##       0.4050  6e-03
## s.e.  0.0858  9e-04
## 
## sigma^2 = 2.97e-05:  log likelihood = 425.77
## AIC=-845.54   AICc=-845.32   BIC=-837.39
Model2<-Arima(GDPGrowth, order = c(0, 0, 3)) # fit an MA(3) model 
Model2
## Series: GDPGrowth 
## ARIMA(0,0,3) with non-zero mean 
## 
## Coefficients:
##          ma1     ma2     ma3    mean
##       0.3227  0.2925  0.1340  0.0061
## s.e.  0.0921  0.0957  0.0816  0.0009
## 
## sigma^2 = 2.937e-05:  log likelihood = 427.39
## AIC=-844.78   AICc=-844.21   BIC=-831.18

Notice that the AIC of Model2 is -844.78, which is smaller than then Model1’s AIC -845.54, Model2 is better or with higher predict accuracy than Model1.

Test for ARCH Effect

ARCH effect is concerned with a relationship within the heteroskedasticity, often termed serial correlation of the heteroskedasticity.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2010-01-01" #We want from Jan2010 forward. 
endd<-"2018-01-01"
freq<-"weekly"
trans<-"rdiff" # calculate simple return
ticker <- "WIKI/JPM.11" # we load the JPM closed price data (the 11th column)

JPM <- Quandl(ticker,transform=trans,start_date=startd, end_date=endd, collapse=freq,type="xts") # Careful here.  The `rdiff` provides the simple return.  We should convert to log returns since we are running a regression. 
colnames(JPM )[1]="SimpleRet" # Add column name 
JPM$LogRet<-log(1+JPM $SimpleRet)

The ARCH model assumes that the conditional mean of the error term in a time series model is constant (zero), but its conditional variance is not.

### Get Residual Square
library(dplyr)
reg<-lm(JPM$LogRet~1) # demean the return series by regress it on constant only
JPM$DMLogRet<-resid(reg) # get residuals, which is the de-mean value of log return
JPM$Sq_DMLogRet<-JPM$DMLogRet^2 # let's compute the squared residuals (i.e. JPM$DMLogRet)
acf(JPM$Sq_DMLogRet) # use the ACF to see if there appears to be persistence in the squared residuals

### Engle's ARCH LM test
# Engle's ARCH LM test is the most commonly applied standard test to detect autoregressive conditional heteroscedasticity. We start with the regression of squared residuals upon lagged squared residuals 
ARCH <- lm(JPM$Sq_DMLogRet~lag.xts(JPM$Sq_DMLogRet,k=1))
summary(ARCH)
## 
## Call:
## lm(formula = JPM$Sq_DMLogRet ~ lag.xts(JPM$Sq_DMLogRet, k = 1))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0031486 -0.0009597 -0.0007224  0.0001594  0.0143211 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.0009616  0.0001200   8.012 1.17e-14 ***
## lag.xts(JPM$Sq_DMLogRet, k = 1) 0.1558554  0.0486171   3.206  0.00145 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.002168 on 413 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.02428,    Adjusted R-squared:  0.02192 
## F-statistic: 10.28 on 1 and 413 DF,  p-value: 0.001452
# Calculate the LM test statistic
library(broom)
RSq_ARCH <- glance(ARCH)[[1]] # grab the R square value of ARCH model
L <- length(JPM$Sq_DMLogRet) # lengths for data
q <- length(coef(ARCH))-1 # degrees of freedom q
LM <- (L-q)*RSq_ARCH # Compute the LM stat as (L-q)*Rsquare
LM
## [1] 10.07604
# Calculate the critical value
alpha <- 0.05 # set significance levels
Critical <- qchisq(1-alpha, q) # get chi-squared stat(the arch test stat is a chi-squared)
Critical
## [1] 3.841459
# Interpret: Null hypothesis is white noise (i.e. no ARCH effects).  Test stat is greater than critical value, so we reject, implying ARCH effects exist.  

GARCH

GARCH Model

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2010-01-01" #We want from Jan2010 forward. 
endd<-"2018-01-01"
freq<-"weekly"
trans<-"rdiff" # calculate simple return
ticker <- "WIKI/JPM.11" # we load the JPM closed price data (the 11th column)

JPM <- Quandl(ticker,transform=trans,start_date=startd, end_date=endd, collapse=freq,type="xts")
colnames(JPM )[1]="SimpleRet" # Add column name 
# Careful here.  The `rdiff` provides the simple return.  We should convert to log returns since we are running a regression.   
JPM$LogRet<-log(1+JPM $SimpleRet)


### Fit GARCH(1,1) Model
library(rugarch)
# GARCH specification
garchSpec <- ugarchspec(variance.model=list(model="sGARCH",garchOrder=c(1,1)),
                        mean.model=list(armaOrder=c(0,0)), 
                        distribution.model="norm")
# Estimate coefficients
garchFit <- ugarchfit(spec=garchSpec, data=JPM$LogRet)
garchFit
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : sGARCH(1,1)
## Mean Model   : ARFIMA(0,0,0)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error  t value Pr(>|t|)
## mu      0.003194    0.001534   2.0819 0.037352
## omega   0.000066    0.000051   1.2876 0.197882
## alpha1  0.063833    0.033107   1.9281 0.053843
## beta1   0.873420    0.075063  11.6358 0.000000
## 
## Robust Standard Errors:
##         Estimate  Std. Error  t value Pr(>|t|)
## mu      0.003194    0.001503  2.12504 0.033583
## omega   0.000066    0.000075  0.88779 0.374655
## alpha1  0.063833    0.050937  1.25317 0.210144
## beta1   0.873420    0.113811  7.67428 0.000000
## 
## LogLikelihood : 834.5133 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike       -3.9929
## Bayes        -3.9541
## Shibata      -3.9930
## Hannan-Quinn -3.9775
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.7315  0.3924
## Lag[2*(p+q)+(p+q)-1][2]    1.2946  0.4118
## Lag[4*(p+q)+(p+q)-1][5]    3.5249  0.3195
## d.o.f=0
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.2204  0.6387
## Lag[2*(p+q)+(p+q)-1][5]    1.3984  0.7649
## Lag[4*(p+q)+(p+q)-1][9]    2.3902  0.8537
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]   0.09997 0.500 2.000  0.7519
## ARCH Lag[5]   0.62060 1.440 1.667  0.8475
## ARCH Lag[7]   1.14186 2.315 1.543  0.8894
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  0.9341
## Individual Statistics:             
## mu     0.1905
## omega  0.1325
## alpha1 0.4935
## beta1  0.2145
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.07 1.24 1.6
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value    prob sig
## Sign Bias           1.2238 0.22172    
## Negative Sign Bias  1.2018 0.23014    
## Positive Sign Bias  0.3963 0.69208    
## Joint Effect       10.7865 0.01294  **
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     21.98       0.2852
## 2    30     36.88       0.1492
## 3    40     41.88       0.3468
## 4    50     48.66       0.4867
## 
## 
## Elapsed time : 0.06580114
# Interpret: The alpha and beta (ARCH and GARCH terms) appear significant. The Ljung Box text of residuals appears to indicate no more persistence in the residuals nor squared residuals. The LM tests suggest no more ARCH effects. 

GJR-GARCH Model

Let’s try to fit another popular GARCH model; one with a threshold variable controlling the level of volatility (i.e. GJRGARCH). See here http://www.scienpress.com/upload/jsem/vol%202_3_6.pdf for a nice general listing of GARCH variants.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd<-"2010-01-01" #We want from Jan2010 forward. 
endd<-"2018-01-01"
freq<-"weekly"
trans<-"rdiff" # calculate simple return
ticker <- "WIKI/JPM.11" # we load the JPM closed price data (the 11th column)

JPM <- Quandl(ticker,transform=trans,start_date=startd, end_date=endd, collapse=freq,type="xts")
colnames(JPM )[1]="SimpleRet" # Add column name 
# Careful here.  The `rdiff` provides the simple return.  We should convert to log returns since we are running a regression.   
JPM$LogRet<-log(1+JPM $SimpleRet)

### Fit GJR-GARCH(1,1) Model
library(rugarch)
# GJR-GARCH specification
garchSpecGJR <- ugarchspec(variance.model=list(model="fGARCH", garchOrder=c(1,1), submodel="GJRGARCH"),
                           mean.model=list(armaOrder=c(0,0)), 
                           distribution.model="norm")
# Estimate coefficients
gjrgarchFit <- ugarchfit(spec=garchSpecGJR, data=JPM$LogRet)
gjrgarchFit
## 
## *---------------------------------*
## *          GARCH Model Fit        *
## *---------------------------------*
## 
## Conditional Variance Dynamics    
## -----------------------------------
## GARCH Model  : fGARCH(1,1)
## fGARCH Sub-Model : GJRGARCH
## Mean Model   : ARFIMA(0,0,0)
## Distribution : norm 
## 
## Optimal Parameters
## ------------------------------------
##         Estimate  Std. Error  t value Pr(>|t|)
## mu      0.002505    0.001494   1.6766 0.093622
## omega   0.000096    0.000069   1.3984 0.161978
## alpha1  0.042470    0.031481   1.3491 0.177313
## beta1   0.822647    0.101647   8.0932 0.000000
## eta11   0.999806    0.499220   2.0027 0.045206
## 
## Robust Standard Errors:
##         Estimate  Std. Error    t value Pr(>|t|)
## mu      0.002505    0.001563    1.60317 0.108898
## omega   0.000096    0.000140    0.68351 0.494284
## alpha1  0.042470    0.043749    0.97078 0.331660
## beta1   0.822647    0.207661    3.96149 0.000074
## eta11   0.999806    0.000411 2433.92991 0.000000
## 
## LogLikelihood : 841.6567 
## 
## Information Criteria
## ------------------------------------
##                     
## Akaike       -4.0224
## Bayes        -3.9739
## Shibata      -4.0227
## Hannan-Quinn -4.0032
## 
## Weighted Ljung-Box Test on Standardized Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                     0.4094  0.5223
## Lag[2*(p+q)+(p+q)-1][2]    1.0424  0.4847
## Lag[4*(p+q)+(p+q)-1][5]    3.5594  0.3143
## d.o.f=0
## H0 : No serial correlation
## 
## Weighted Ljung-Box Test on Standardized Squared Residuals
## ------------------------------------
##                         statistic p-value
## Lag[1]                    0.08875  0.7658
## Lag[2*(p+q)+(p+q)-1][5]   2.24595  0.5615
## Lag[4*(p+q)+(p+q)-1][9]   3.61417  0.6548
## d.o.f=2
## 
## Weighted ARCH LM Tests
## ------------------------------------
##             Statistic Shape Scale P-Value
## ARCH Lag[3]    0.3012 0.500 2.000  0.5831
## ARCH Lag[5]    0.8767 1.440 1.667  0.7699
## ARCH Lag[7]    1.6946 2.315 1.543  0.7816
## 
## Nyblom stability test
## ------------------------------------
## Joint Statistic:  0.9148
## Individual Statistics:              
## mu     0.09678
## omega  0.08451
## alpha1 0.27392
## beta1  0.16032
## eta11  0.27395
## 
## Asymptotic Critical Values (10% 5% 1%)
## Joint Statistic:          1.28 1.47 1.88
## Individual Statistic:     0.35 0.47 0.75
## 
## Sign Bias Test
## ------------------------------------
##                    t-value    prob sig
## Sign Bias           1.7138 0.08731   *
## Negative Sign Bias  0.1439 0.88561    
## Positive Sign Bias  0.3059 0.75984    
## Joint Effect        7.3349 0.06196   *
## 
## 
## Adjusted Pearson Goodness-of-Fit Test:
## ------------------------------------
##   group statistic p-value(g-1)
## 1    20     21.02       0.3357
## 2    30     31.12       0.3600
## 3    40     39.77       0.4357
## 4    50     55.15       0.2533
## 
## 
## Elapsed time : 0.268625

VAR

The vector auto-regression (VAR) model extends the idea of uni-variate auto-regression to k time series regressions, where the lagged values of all k series appear as regressors. One the one hand, economic elements like GDP, investment and consumer spending, all depends upon interest rates. One the other hand, the level of interest rates is also set, in part, by the prospects for economic growth and inflation. Hence, we need a VAR model.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"  
endd <-"2012-12-31"

TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression

TSpread <- TB10YS - TB3MS 
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%

# Visual inspection
plot(cbind(GDPGrowth, TSpread),xlab = "Date", main='RGDP growth and Term spread')

library(vars)
### Step 1: Model selection (use information criteria to decide upon the number of lags to include)
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2
# Interpret: All the information criteria suggest using lags = 2 --> we need to set p=2 when estimating the model


### Step 2: Estimate VAR model
VAR_fit<-VAR(y = VAR_Data, p = 2)
summary(VAR_fit)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: GDPGrowth, TSpread 
## Deterministic variables: const 
## Sample size: 128 
## Log Likelihood: -381.122 
## Roots of the characteristic polynomial:
## 0.8208 0.8208 0.1958 0.1958
## Call:
## VAR(y = VAR_Data, p = 2)
## 
## 
## Estimation results for equation GDPGrowth: 
## ========================================== 
## GDPGrowth = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const 
## 
##              Estimate Std. Error t value Pr(>|t|)    
## GDPGrowth.l1  0.29512    0.08356   3.532 0.000582 ***
## TSpread.l1   -0.87548    0.36949  -2.369 0.019373 *  
## GDPGrowth.l2  0.21663    0.08173   2.651 0.009089 ** 
## TSpread.l2    1.29865    0.37148   3.496 0.000658 ***
## const         0.48692    0.47198   1.032 0.304256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.421 on 123 degrees of freedom
## Multiple R-Squared: 0.3111,  Adjusted R-squared: 0.2887 
## F-statistic: 13.88 on 4 and 123 DF,  p-value: 2.246e-09 
## 
## 
## Estimation results for equation TSpread: 
## ======================================== 
## TSpread = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## GDPGrowth.l1  0.009302   0.017077   0.545 0.586944    
## TSpread.l1    1.057695   0.075515  14.006  < 2e-16 ***
## GDPGrowth.l2 -0.056374   0.016703  -3.375 0.000988 ***
## TSpread.l2   -0.218659   0.075920  -2.880 0.004691 ** 
## const         0.454647   0.096461   4.713 6.48e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.4948 on 123 degrees of freedom
## Multiple R-Squared: 0.8304,  Adjusted R-squared: 0.8249 
## F-statistic: 150.6 on 4 and 123 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##           GDPGrowth TSpread
## GDPGrowth   5.86218 0.05959
## TSpread     0.05959 0.24486
## 
## Correlation matrix of residuals:
##           GDPGrowth TSpread
## GDPGrowth   1.00000 0.04974
## TSpread     0.04974 1.00000

Our estimate functions are:

\[\begin{align} GDPgrowth_t &= c_1 + \alpha_{11}GDPGrowth_{t-1} + \alpha_{12}TSpread_{t-1} + \alpha_{13}GDPGrowth_{t-2} + \alpha_{14}TSpread_{t-2} ~~~(1)\\ TSpread_t &= c_2 + \alpha_{21}GDPGrowth_{t-1} + \alpha_{22}TSpread_{t-1} + \alpha_{23}GDPGrowth_{t-2} + \alpha_{24}TSpread_{t-2} ~~~(2) \end{align}\]

In this first equation, the a11 coefficient = 0.295 implies that for every one unit increase in the last quarter GDP growth rate, GDP this quarter will rise by .295, holding constant the dynamic effects of prior GDP growth and the Term Spread. Meanwhile, the a12 coefficient = -0.875 implies that every one unit increase in the last quarter Term Spread will cause GDP growth this quarter to fall by .875 units.

Now Let’s look at the second equation. a21 = 0.009 implies that for every one unit increase in the last quarter GDP growth rate, Term Spread this quarter will rise by .009. Likewise, the a22 = 1.058 implies that a unit increase in the last quarter Term Spread will cause Term Spread this quarter to rise by 1.058 units.

However, since each coefficient in the VAR model only reflects a partial dynamic relationship and cannot capture a comprehensive dynamic relationship, we may need other tools such as Granger causality test, IRF impulse response function to help us understand the relationships.

# Obtain the adj. R^2 from the output of 'VAR()'
summary(VAR_fit$varresult$GDPGrowth)$adj.r.squared
## [1] 0.2886718
summary(VAR_fit$varresult$TSpread)$adj.r.squared
## [1] 0.824881
# Multi-step forecast
predictions <- predict(VAR_fit,n.ahead = 15, ci = 0.95)
predictions
## $GDPGrowth
##           fcst     lower    upper       CI
##  [1,] 1.198711 -3.546741 5.944163 4.745452
##  [2,] 1.383492 -3.624828 6.391812 5.008320
##  [3,] 1.734391 -3.469314 6.938096 5.203705
##  [4,] 2.045169 -3.266459 7.356797 5.311628
##  [5,] 2.304776 -3.077621 7.687172 5.382396
##  [6,] 2.514830 -2.924988 7.954647 5.439817
##  [7,] 2.674189 -2.814666 8.163044 5.488855
##  [8,] 2.787256 -2.742916 8.317427 5.530171
##  [9,] 2.860470 -2.702736 8.423677 5.563207
## [10,] 2.901204 -2.686752 8.489160 5.587956
## [11,] 2.916929 -2.688342 8.522199 5.605271
## [12,] 2.914597 -2.701976 8.531170 5.616573
## [13,] 2.900280 -2.723177 8.523737 5.623457
## [14,] 2.878988 -2.748391 8.506367 5.627379
## [15,] 2.854633 -2.774856 8.484122 5.629489
## 
## $TSpread
##           fcst       lower    upper        CI
##  [1,] 1.805240  0.83538762 2.775093 0.9698527
##  [2,] 2.015903  0.60191951 3.429887 1.4139838
##  [3,] 2.137420  0.47220986 3.802629 1.6652097
##  [4,] 2.212731  0.37280391 4.052657 1.8399267
##  [5,] 2.248926  0.29235971 4.205491 1.9565658
##  [6,] 2.255636  0.22410116 4.287172 2.0315352
##  [7,] 2.242139  0.16481529 4.319463 2.0773237
##  [8,] 2.216036  0.11229197 4.319781 2.1037443
##  [9,] 2.183447  0.06528336 4.301611 2.1181637
## [10,] 2.148992  0.02326201 4.274722 2.1257301
## [11,] 2.115927 -0.01381595 4.245670 2.1297428
## [12,] 2.086338 -0.04576626 4.218441 2.1321039
## [13,] 2.061363 -0.07240850 4.195135 2.1337716
## [14,] 2.041416 -0.09372608 4.176558 2.1351420
## [15,] 2.026388 -0.10993889 4.162715 2.1363268
plot(predictions, names = "GDPGrowth")

plot(predictions, names = "TSpread")

Test the Direction of Causality

Test the Direction of Causality Using causality()

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"  
endd <-"2012-12-31"

TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression

TSpread <- TB10YS - TB3MS 
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%

### VAR Model
library(vars)
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2
VAR_fit<-VAR(y = VAR_Data, p = 2)
summary(VAR_fit)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: GDPGrowth, TSpread 
## Deterministic variables: const 
## Sample size: 128 
## Log Likelihood: -381.122 
## Roots of the characteristic polynomial:
## 0.8208 0.8208 0.1958 0.1958
## Call:
## VAR(y = VAR_Data, p = 2)
## 
## 
## Estimation results for equation GDPGrowth: 
## ========================================== 
## GDPGrowth = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const 
## 
##              Estimate Std. Error t value Pr(>|t|)    
## GDPGrowth.l1  0.29512    0.08356   3.532 0.000582 ***
## TSpread.l1   -0.87548    0.36949  -2.369 0.019373 *  
## GDPGrowth.l2  0.21663    0.08173   2.651 0.009089 ** 
## TSpread.l2    1.29865    0.37148   3.496 0.000658 ***
## const         0.48692    0.47198   1.032 0.304256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.421 on 123 degrees of freedom
## Multiple R-Squared: 0.3111,  Adjusted R-squared: 0.2887 
## F-statistic: 13.88 on 4 and 123 DF,  p-value: 2.246e-09 
## 
## 
## Estimation results for equation TSpread: 
## ======================================== 
## TSpread = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## GDPGrowth.l1  0.009302   0.017077   0.545 0.586944    
## TSpread.l1    1.057695   0.075515  14.006  < 2e-16 ***
## GDPGrowth.l2 -0.056374   0.016703  -3.375 0.000988 ***
## TSpread.l2   -0.218659   0.075920  -2.880 0.004691 ** 
## const         0.454647   0.096461   4.713 6.48e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.4948 on 123 degrees of freedom
## Multiple R-Squared: 0.8304,  Adjusted R-squared: 0.8249 
## F-statistic: 150.6 on 4 and 123 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##           GDPGrowth TSpread
## GDPGrowth   5.86218 0.05959
## TSpread     0.05959 0.24486
## 
## Correlation matrix of residuals:
##           GDPGrowth TSpread
## GDPGrowth   1.00000 0.04974
## TSpread     0.04974 1.00000
### Granger Causality Tests
# H0: GDPGrowth does not Granger-cause TSpread (GDPGrowth is not the cause varaible) <=> H0: a21 = a23 = 0
causality(VAR_fit, cause = "GDPGrowth")$Granger
## 
##  Granger causality H0: GDPGrowth do not Granger-cause TSpread
## 
## data:  VAR object VAR_fit
## F-Test = 6.1452, df1 = 2, df2 = 246, p-value = 0.002487
# H0: TSpread does not ranger-cause GDPGrowth (TSpread is not the cause varaible) <=> H0: a12 = a14 = 0
causality(VAR_fit, cause = "TSpread")$Granger
## 
##  Granger causality H0: TSpread do not Granger-cause GDPGrowth
## 
## data:  VAR object VAR_fit
## F-Test = 7.1708, df1 = 2, df2 = 246, p-value = 0.00094

Test the Direction of Causality Using linearHypothesis()

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"  
endd <-"2012-12-31"

TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression

TSpread <- TB10YS - TB3MS 
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%

### Model selection
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2
# Interpret: All the information criteria suggest using lags = 2 --> we need to set p=2 when estimating the mod


### Estimate VAR equations separately by OLS
library(dynlm)
VAR_EQ1<-dynlm(GDPGrowth ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2), start=c(1981,1), end=c(2012,4))
VAR_EQ2<-dynlm(TSpread ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2), start=c(1981,1), end=c(2012,4))
# Rename regressors for better readability
names(VAR_EQ1$coefficients) <- c("Intercept","Growth_t-1", "Growth_t-2", "TSpread_t-1", "TSpread_t-2")
names(VAR_EQ2$coefficients) <- names(VAR_EQ1$coefficients)

# Obtain Robust Coefficient
library(lmtest)
library(sandwich)
coeftest(VAR_EQ1, vcov. = sandwich)
## 
## t test of coefficients:
## 
##              Estimate Std. Error t value Pr(>|t|)   
## Intercept    0.486924   0.523771  0.9297 0.354373   
## Growth_t-1   0.295117   0.109974  2.6835 0.008288 **
## Growth_t-2   0.216634   0.086355  2.5086 0.013421 * 
## TSpread_t-1 -0.875477   0.361088 -2.4246 0.016781 * 
## TSpread_t-2  1.298647   0.395225  3.2858 0.001325 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(VAR_EQ2, vcov. = sandwich)
## 
## t test of coefficients:
## 
##               Estimate Std. Error t value  Pr(>|t|)    
## Intercept    0.4546467  0.1214656  3.7430 0.0002778 ***
## Growth_t-1   0.0093019  0.0218258  0.4262 0.6707140    
## Growth_t-2  -0.0563737  0.0266037 -2.1190 0.0360996 *  
## TSpread_t-1  1.0576951  0.0984832 10.7399 < 2.2e-16 ***
## TSpread_t-2 -0.2186588  0.1088367 -2.0091 0.0467207 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### Granger causality tests
library(car)
# H0: X does not Granger Cause Y
linearHypothesis(VAR_EQ1, 
                 hypothesis.matrix = c("TSpread_t-1", "TSpread_t-2"),
                 vcov. = sandwich)
## Linear hypothesis test
## 
## Hypothesis:
## TSpread_t - 0
## TSpread_t - 2 = 0
## 
## Model 1: restricted model
## Model 2: GDPGrowth ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2)
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F   Pr(>F)   
## 1    125                      
## 2    123  2 5.5884 0.004753 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
linearHypothesis(VAR_EQ2, 
                 hypothesis.matrix = c("Growth_t-1", "Growth_t-2"),
                 vcov. = sandwich)
## Linear hypothesis test
## 
## Hypothesis:
## Growth_t - 0
## Growth_t - 2 = 0
## 
## Model 1: restricted model
## Model 2: TSpread ~ L(GDPGrowth, 1:2) + L(TSpread, 1:2)
## 
## Note: Coefficient covariance matrix supplied.
## 
##   Res.Df Df      F  Pr(>F)  
## 1    125                    
## 2    123  2 3.3739 0.03746 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpret: Both Granger causality tests reject at the level of 5%, this is evidence in favor of the conjecture that the term spread has power in explaining GDP growth and vice versa

Impulse Response Function

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(Quandl)
Quandl_key = read.csv("quandlkey.csv",stringsAsFactors=FALSE)$Key
Quandl.api_key(Quandl_key)
startd <-"1980-04-01"  
endd <-"2012-12-31"

TB3MS <- Quandl("FRED/TB3MS",start_date=startd, end_date=endd,type="ts")
TB10YS <- Quandl("FRED/GS10",start_date=startd, end_date=endd,type="ts")
GDP <- Quandl("FRED/GDPC96",start_date=startd, end_date=endd,type="ts", transform="rdiff")# note this is simple return, we need log return since we want to run regression

TSpread <- TB10YS - TB3MS 
TSpread <- aggregate(TSpread,nfrequency=4,FUN=mean)# aggregate monthly data to quarterly(averages)
GDPGrowth = 400*log(1+GDP)# annual log growth rate%


### VAR Model
library(vars)
VAR_Data<-na.omit(ts.union(GDPGrowth, TSpread)) #set up data for estimation
VARselect(VAR_Data, lag.max = 12, type = "const")$selection #obtain the best lag period
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2
VAR_fit<-VAR(y = VAR_Data, p = 2)
summary(VAR_fit)
## 
## VAR Estimation Results:
## ========================= 
## Endogenous variables: GDPGrowth, TSpread 
## Deterministic variables: const 
## Sample size: 128 
## Log Likelihood: -381.122 
## Roots of the characteristic polynomial:
## 0.8208 0.8208 0.1958 0.1958
## Call:
## VAR(y = VAR_Data, p = 2)
## 
## 
## Estimation results for equation GDPGrowth: 
## ========================================== 
## GDPGrowth = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const 
## 
##              Estimate Std. Error t value Pr(>|t|)    
## GDPGrowth.l1  0.29512    0.08356   3.532 0.000582 ***
## TSpread.l1   -0.87548    0.36949  -2.369 0.019373 *  
## GDPGrowth.l2  0.21663    0.08173   2.651 0.009089 ** 
## TSpread.l2    1.29865    0.37148   3.496 0.000658 ***
## const         0.48692    0.47198   1.032 0.304256    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 2.421 on 123 degrees of freedom
## Multiple R-Squared: 0.3111,  Adjusted R-squared: 0.2887 
## F-statistic: 13.88 on 4 and 123 DF,  p-value: 2.246e-09 
## 
## 
## Estimation results for equation TSpread: 
## ======================================== 
## TSpread = GDPGrowth.l1 + TSpread.l1 + GDPGrowth.l2 + TSpread.l2 + const 
## 
##               Estimate Std. Error t value Pr(>|t|)    
## GDPGrowth.l1  0.009302   0.017077   0.545 0.586944    
## TSpread.l1    1.057695   0.075515  14.006  < 2e-16 ***
## GDPGrowth.l2 -0.056374   0.016703  -3.375 0.000988 ***
## TSpread.l2   -0.218659   0.075920  -2.880 0.004691 ** 
## const         0.454647   0.096461   4.713 6.48e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## 
## Residual standard error: 0.4948 on 123 degrees of freedom
## Multiple R-Squared: 0.8304,  Adjusted R-squared: 0.8249 
## F-statistic: 150.6 on 4 and 123 DF,  p-value: < 2.2e-16 
## 
## 
## 
## Covariance matrix of residuals:
##           GDPGrowth TSpread
## GDPGrowth   5.86218 0.05959
## TSpread     0.05959 0.24486
## 
## Correlation matrix of residuals:
##           GDPGrowth TSpread
## GDPGrowth   1.00000 0.04974
## TSpread     0.04974 1.00000
### IRF
# Consider the response of GDP growth to term spread shock
IRF_GDP<- irf(VAR_fit, impulse = "TSpread", response = "GDPGrowth", n.ahead = 20, boot = TRUE)
plot(IRF_GDP, ylab = "GDP growth", main = "Shock from Term spread")

#Interpret: The IRF estimates the effects from one unit shock to the error in the TSpread on future value of GDPGrowth. For example, IRF_GDP[21]=-0.025 implies that the influence comes from one unit shock to the error in the TSpread will cause 20 step-ahead GDPGrowth decrease by 0.025. Note that a positive shock to Term spread has an immediate negative impact on GDP growth, with growth falling to -0.432. By about the 5qtr growth turns positive again and by the 20th quarter the impact of the shock is largely dissipated.  


# Consider the response of term spread to GDP growth shock
IRF_TSpread <- irf(VAR_fit, impulse="GDPGrowth", response="TSpread", n.ahead = 20, boot = TRUE)
plot(IRF_TSpread, ylab = "Term spread", main = "Shock from GDP growth")

Event Study

### Housekeeping
rm(list=ls()) # clear workspace
cat("\014")  # clear console
# prepare library
library(rvest) # crawl data from html
library(Quandl)
library(quantmod)
library(PerformanceAnalytics)
library(here)
library("readxl")
library(tidyverse)
library(data.table)
library(plyr)
library(ggplot2)
# fetch DOWJIA ticker list from wiki
url <- "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average"
DOWJIA <- url %>%
  xml2::read_html() %>%
  html_nodes(xpath='//*[@id="constituents"]') %>%
  html_table()

DOWJIA <- DOWJIA[[1]]
DOW_Tickers <- DOWJIA$Symbol

Load Data

### load data (Dow 30 Constituents)
Tickers<-c("^GSPC",DOW_Tickers)
Rf_Tickers<-'DTB4WK'

startd = "2018-12-01"
endd = "2021-04-15"

# pull stock price from yahoo 
getSymbols(Tickers,from=startd,to=endd,src='yahoo')
##  [1] "^GSPC" "MMM"   "AXP"   "AMGN"  "AAPL"  "BA"    "CAT"   "CVX"   "CSCO" 
## [10] "KO"    "DIS"   "DOW"   "GS"    "HD"    "HON"   "IBM"   "INTC"  "JNJ"  
## [19] "JPM"   "MCD"   "MRK"   "MSFT"  "NKE"   "PG"    "CRM"   "TRV"   "UNH"  
## [28] "VZ"    "V"     "WBA"   "WMT"
getSymbols(Rf_Tickers,src='FRED')
## [1] "DTB4WK"
### stock Return
tickers = gsub("[[:punct:]]", "", Tickers)
Return = do.call(merge,lapply(tickers, function(x) 
        periodReturn(Ad(get(x)),period='daily',type='arithmetic')))
Return = Return[-1,]
names(Return) = tickers

### Rf
DTB4WK = na.omit(DTB4WK)
Rf = DTB4WK[paste(startd,"/",endd,sep="")] # annual rate
Rf = (Rf/100+1)^(1/252)-1 # convert to daily date, business day
names(Rf) = "Rf"

### merge data
Data = merge(Return,Rf)
Data = na.omit(Data)

### excess return
NumCol = ncol(Data)
StocksEXRet = Data[,1:NumCol-1]
for (i in 1:ncol(StocksEXRet)){
StocksEXRet[,i]<- StocksEXRet[,i]-Data$Rf
}
Rf = Data$Rf

### log return
LogStocksEXRet = log(1+StocksEXRet)
LogRf = log(1+Rf)

# print data
head(cbind(LogStocksEXRet[,1:4],LogRf))
##                     GSPC           MMM          AXP          AMGN           Rf
## 2019-03-20 -0.0030434855 -0.0035455972 -0.017103361 -0.0020288662 9.450071e-05
## 2019-03-21  0.0106986043  0.0061733709  0.009339165  0.0039246867 9.643767e-05
## 2019-03-22 -0.0192543499 -0.0239964108 -0.021429188 -0.0275174206 9.566300e-05
## 2019-03-25 -0.0009343499 -0.0072058276 -0.003939614 -0.0006842911 9.488817e-05
## 2019-03-26  0.0070628327  0.0195445844  0.004115378  0.0088691184 9.488817e-05
## 2019-03-27 -0.0047500940 -0.0004807677 -0.004855138 -0.0105066826 9.450071e-05

Set Event Dates

# set parameters
event_date = as.Date("2020-01-06")
event_window = 10 
estimate_window = 250 
postevent_window = 30
T1 = event_date - event_window
T2 = event_date + event_window
T0 = T1 - estimate_window
T3 = T2 + postevent_window

### fit CAPM model
# estimate data
Estimate_Data = LogStocksEXRet[paste(T0,"/",T1-1,sep="")]

# CAPM regression
model<-lm(Estimate_Data[,2]~Estimate_Data[,1])
Coeff<- data.frame(model$coefficients)
Coeff = t(Coeff)

NumCols = ncol(Estimate_Data)
for (i in 3:NumCols){
  model<-lm(Estimate_Data[,i]~Estimate_Data[,1])
  coeff = data.frame(model$coefficients)
  coeff = t(coeff)
  coeff
  Coeff = rbind(Coeff,coeff)
}

# save betas for all tickers
Tickers<-DOW_Tickers
Coeff<-data.frame(Coeff,Tickers)
head(Coeff)
##                       X.Intercept. Estimate_Data...1. Tickers
## model.coefficients   -1.824561e-03          1.1560208     MMM
## model.coefficients.1 -7.993798e-05          1.1140154     AXP
## model.coefficients.2  1.449461e-03          0.6143389    AMGN
## model.coefficients.3  1.142455e-03          1.4775025    AAPL
## model.coefficients.4 -1.518523e-03          0.8504409      BA
## model.coefficients.5 -5.245564e-04          1.2804610     CAT
### predict "normal" return
Test_data = LogStocksEXRet[paste(T1,"/",T3,sep="")]
Prediction = Test_data[,-1]
for(i in 1:ncol(Prediction)){
  Prediction[,i] = Coeff[i,1]+Coeff[i,2]*Test_data[,1]
}

### abnormal return
AR = Test_data[,-1]
for(i in 1:ncol(AR)){
  AR[,i] = Test_data[,i+1]-Prediction[,i]
}

### Cumulative AR
CAR = cumsum(AR)

Plot CAR

### convert to long table
CAR_df = data.frame(CAR)
CAR_df$Date = index(CAR)
CAR_df=melt(setDT(CAR_df), measure.vars=list(c(1:30)), 
     variable.name='Ticker', value.name=c("CAR"))[, 
      Ticker:= DOW_Tickers[Ticker]][order(Date)]

# average CAR
AvgCAR <- ddply(CAR_df, "Date", summarise, Ticker = "AVERAGE",AvgCAR=mean(CAR))

### plot

ggplot(CAR_df, aes(x=Date, y=CAR, group=Ticker, color=Ticker)) +
    geom_line(size=0.8,alpha=0.8)+
    geom_line(data = AvgCAR, aes(x=Date,y=AvgCAR),size = 1.2, color = "black")+
    geom_vline(aes(xintercept = event_date),linetype="dashed",color = "darkred",size=1.5)+
    geom_hline(aes(yintercept = 0),linetype="dashed",color = "darkred",size=1.1)+
    annotate(geom="text", x=event_date+10, y=0.4, label="Jan6th",fontface =2,
           size =8,alpha = 0.8,color="darkred")+
    scale_x_date(date_breaks = "5 days", date_labels = "%b %d")+
    ggtitle("Cumulative Abnormal Log Excess Return")+ylab("CAR")

d<-data.frame(CAR=as.numeric(t(CAR[T2])))
d$Ticker = DOW_Tickers

titletoplot = paste0("CAR ",event_window," days after Jan6th")
ggplot(data = d,aes(x = Ticker, y=CAR,fill=Ticker))+
  geom_bar(stat="identity",alpha=0.5)+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  labs(y="CAR",x='Ticker',title = titletoplot)

Summarize J Stats Across All Assets

### calculate J-stat
Avg_event_CAR = rowMeans(CAR[paste(T1,"/",T2,sep="")])
JCAR = tail(Avg_event_CAR,1)
event_AR = AR[paste(T1,"/",T2,sep="")]
Jsigma = sqrt(sum(sapply(event_AR,FUN=var))/30^2)

Jstat = JCAR/Jsigma
pvalues = pnorm(q=abs(Jstat),lower.tail = FALSE)*2

# print result
print(cbind(CAR = JCAR, Jstat,Pvalue = pvalues))
##             CAR    Jstat      Pvalue
## [1,] -0.0046184 -3.13541 0.001716142

Classification

Simple Logistic

Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical models used.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
  series_id = "T10Y2Y",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
Recession<-fredr(
  series_id = "USREC",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
summary(logit1)
## 
## Call:
## glm(formula = Recession ~ TenTwoLag4, family = "binomial", data = MyData)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9476  -0.5342  -0.4136  -0.2796   2.6292  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5604     0.1633  -9.557  < 2e-16 ***
## TenTwoLag4   -0.8068     0.1715  -4.704 2.55e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 367.66  on 536  degrees of freedom
## Residual deviance: 342.96  on 535  degrees of freedom
## AIC: 346.96
## 
## Number of Fisher Scoring iterations: 5

Convert Log Odds to Prob

Reference: https://sebastiansauer.github.io/convert_logit2prob/

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
  series_id = "T10Y2Y",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
Recession<-fredr(
  series_id = "USREC",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)

library(xts)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')

logit2prob <- function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds) 
  return(prob)
}
logit2prob(coef(logit1))
## (Intercept)  TenTwoLag4 
##   0.1735834   0.3085671

Where .3 is read as 30% probability

Logit Marginal Effects

Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical models used.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
  series_id = "T10Y2Y",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
Recession<-fredr(
  series_id = "USREC",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
newdata<-data.frame(TenTwoLag4=tail(TenTwo$value,n=1)) #last observed value of 10-2yr
PRED<-predict(logit1, newdata = newdata, type = 'response')
coeffs<-coef(logit1)
TenTwoMean=mean(TenTwo$value)
MgrlEffectTenTwo =(exp(-TenTwoMean)/(1+exp(-TenTwoMean))^2)*coeffs[2]
print(paste('For every 1 percentage point (i.e. 100bps) increase in the 10-2yr spread, the probability of a recession changes by ', MgrlEffectTenTwo,' %'))
## [1] "For every 1 percentage point (i.e. 100bps) increase in the 10-2yr spread, the probability of a recession changes by  -0.163825499802387  %"

Logit Confusion Matrix

Use a logistic model to model recessions with 10yr-2yr Treasury Spread

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
  series_id = "T10Y2Y",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
Recession<-fredr(
  series_id = "USREC",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)

library(xts)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')

Pred<-predict(logit1,MyData,type="response")
cutoff = .3
Pred2<-ifelse(Pred>=cutoff,1,0)
library(caret)
Actual<-factor(MyData$Recession)
Predicted<-factor(Pred2)
C<-confusionMatrix(data=Predicted,reference=Actual)
C
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 477  48
##          1   2  10
##                                           
##                Accuracy : 0.9069          
##                  95% CI : (0.8791, 0.9301)
##     No Information Rate : 0.892           
##     P-Value [Acc > NIR] : 0.148           
##                                           
##                   Kappa : 0.2582          
##                                           
##  Mcnemar's Test P-Value : 1.966e-10       
##                                           
##             Sensitivity : 0.9958          
##             Specificity : 0.1724          
##          Pos Pred Value : 0.9086          
##          Neg Pred Value : 0.8333          
##              Prevalence : 0.8920          
##          Detection Rate : 0.8883          
##    Detection Prevalence : 0.9777          
##       Balanced Accuracy : 0.5841          
##                                           
##        'Positive' Class : 0               
## 

Note: Accuracy = (477+10)/537 (i.e. % of results that are correctly classified) Sensitivity = 477/(477+2) (i.e. % of actual 0’s correctly classified) Specificity = 10/(48+10) (i.e. % of actual 1’s correctly classified)

Logit Prediction

Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical models used.

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
library(xts)
fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
  series_id = "T10Y2Y",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
Recession<-fredr(
  series_id = "USREC",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)
MyData$TenTwoLag4 <- lag.xts(MyData$TenTwo,k=4)
MyData<-na.omit(MyData)
logit1<-glm(Recession~TenTwoLag4,data=MyData,family = 'binomial')
newdata<-data.frame(TenTwoLag4=tail(TenTwo$value,n=1)) #last observed value of 10-2yr
#newdata
PRED<-predict(logit1, newdata = newdata, type = 'response')
coeffs<-coef(logit1)
ByHand = 1/(1+exp(-(coeffs[1]+coeffs[2]*newdata)))
#ByHand
print(paste('Predicted Probability of Recession given 10-2yr = ', as.numeric(newdata[1])))
## [1] "Predicted Probability of Recession given 10-2yr =  0.78"
print(paste('Via predict ',PRED,' | By Hand ',ByHand))
## [1] "Via predict  0.100673319918696  | By Hand  0.100673319918696"

Yield Curve Recession

Use a logistic to model recessions with 10yr-2yr Treasury Spread. Note: This is a simplified version of the typical implementation. Recession in Next 3 mths = f(10-2yr Treasury Spread @ time t)

rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(fredr)
library(xts)

fred_api_key<-read.csv("../data/fredkey.csv",stringsAsFactors=FALSE)
fredr_set_key(fred_api_key$Key)
TenTwo<-fredr(
  series_id = "T10Y2Y",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
Recession<-fredr(
  series_id = "USREC",
  frequency = "m", # monthly
  observation_start = as.Date("1977-01-01"),
  observation_end = as.Date("2022-01-01"),
  units = "lin"
)
MyData<-data.frame(TenTwo=TenTwo$value,Recession=Recession$value,row.names = Recession$date)
MyData<-as.xts(MyData)

USRECLEADS<-lag.xts(MyData$Recession,k=-3:-1)
USRECLEADS$RowSum<-rowSums(USRECLEADS)
USRECLEADS$RecNext3Mths<-ifelse(USRECLEADS$RowSum>=1,1,0)
MyData$RecNext3Mths<-USRECLEADS$RecNext3Mths
MyData<-na.omit(MyData)


logit1<-glm(RecNext3Mths~TenTwo,data=MyData,family = 'binomial')
summary(logit1)
## 
## Call:
## glm(formula = RecNext3Mths ~ TenTwo, family = "binomial", data = MyData)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8367  -0.5824  -0.4896  -0.3788   2.3567  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.5011     0.1603  -9.364  < 2e-16 ***
## TenTwo       -0.5134     0.1489  -3.448 0.000565 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 415.98  on 537  degrees of freedom
## Residual deviance: 403.50  on 536  degrees of freedom
## AIC: 407.5
## 
## Number of Fisher Scoring iterations: 5